#!/bin/bash

#PBS -l nodes=1:ppn=8,walltime=5:00:00
#PBS -N make_exomes_full_parallel
#PBS -M cmb433@nyu.edu
#PBS -m abe
#PBS -e localhost:/scratch/cmb433/xspecies-exome/${PBS_JOBNAME}.e${PBS_JOBID}.${PBS_ARRAYID}
#PBS -o localhost:/scratch/cmb433/xspecies-exome/${PBS_JOBNAME}.o${PBS_JOBID}.${PBS_ARRAYID}

# ------------------------------------------------------------------------------
# Variables
# ------------------------------------------------------------------------------

working_dir=/scratch/cmb433/xspecies-exome

# ------------------------------------------------------------------------------
# Run pipeline
# ------------------------------------------------------------------------------

cd $working_dir

# There are 5017 sequences to run in the full dataset
# Call with:
# qsub -t 1-5017:100 pbs/make_exomes_full_parallel.pbs

# Then recombine output with:
# cat `ls -v *full_parallel*.o*` > fake_human_exomes_full.seq

TO_DO=100
START=$PBS_ARRAYID
END=$(($PBS_ARRAYID + $TO_DO - 1))


perl scripts/make_fake_human_exomes.pl \
	--target_fasta data/hg18_gphocs_targets.full.fa \
	--liftover_path ~/exome_macaque/bin/liftover/ \
	--hg_to_pan_chain ~/exome_macaque/bin/liftover/hg18ToPanTro2.over.chain \
	--chimp_seqs_fasta data/panTro2_gphocs_targets.full.fa \
	--start $START --end $END

exit;
